######## Attitudes to crime and punishment in England and Wales, 1964-2023
######## 001. Analysis_Trends


library(tidyverse)
library(gt)

Data <- readRDS(file = "/Users/matteo/Desktop/Prison Polling/AttitudesToCrimeAndPunishment.RData")

Data[[1]] <- Data[[1]][Data[[1]]$Demographic == "All adults",]
Data[[2]] <- Data[[2]][Data[[2]]$Demographic == "All adults",]
Data[[3]] <- Data[[3]][Data[[3]]$Demographic == "All adults",]
Data[[4]] <- Data[[4]][Data[[4]]$Demographic == "All adults",]


#### Dyad Ratio analysis

## Full sample table

Data[[1]] %>% 
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1965) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index)) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) -> CrimeConcern
length(unique(CrimeConcern$Varname)) -> crimeqs
nrow(CrimeConcern) -> crimecases
Data[[2]] %>% 
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1981) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index)) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) -> Punitiveness
length(unique(Punitiveness$Varname)) -> punqs
nrow(Punitiveness) -> puncases
Data[[3]] %>% 
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1964) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index)) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) -> DeathPen
length(unique(DeathPen$Varname)) -> deathqs
nrow(DeathPen) -> deathcases
Data[[4]] %>% 
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1973) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index)) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) -> MIP
length(unique(MIP$Varname)) -> priqs
nrow(MIP) -> pricases

tribble(~`Variable`, ~`Crime Concern`, ~`Punitiveness`, ~`Death penalty`,~Prioritisation,
        "Question series",as.character(crimeqs),as.character(punqs),as.character(deathqs),as.character(priqs),
        "Survey-year pairs",as.character(crimecases-1),as.character(puncases-1),as.character(deathcases-1),as.character(pricases-1),
        "Years",'1965 - 2023','1981 - 2023','1962 - 2023','1973 - 2023') %>%
  gt(rowname_col = "Variable") %>%
  tab_header(title = md("**Table 2: Full sample of polling data for England and Wales**")) %>%
  cols_align(align = "center", columns = c(2:5)) %>%
  cols_width(Variable ~ pct(20), `Crime Concern` ~ pct(20), `Punitiveness` ~ pct(20), `Death penalty` ~ pct(20), Prioritisation ~ pct(20)) %>%
  tab_source_note(source_note = "Notes: Data taken from the British Election Study, British Social Attitudes Survey, the Crime Survey for England and Wales, YouGov, Gallup and Ipsos.")
#gtsave(filename = "table_2.html", path = "/Users/matteo/Downloads")
rm(CrimeConcern, Punitiveness, DeathPen, MIP, crimeqs,punqs,deathqs,priqs,crimecases,puncases,deathcases,pricases)
# NB This table contains just data points used in the analysis i.e. polls aggregated by year


## Concern

Data[[1]] %>% mutate(Date = as.Date(Date)) -> concern
output <- DyadRatios::extract(concern$Varname, concern$Date, concern$Index,
                              unit="A", begindt=NA, enddt=NA,
                              smoothing=TRUE, verbose = TRUE)
# Percent Variance Explained:  55.97
# Number of usable series: 60 (questions), 1965 - 2023
# Final Weighted Average Metric:  Mean:  0.42  St. Dev:  0.06

# Figure 1: concern of crime in England and Wales, 1965 - 2023
plot(x = output$period, y = output$latent1, type = "l", 
     xlab = NA, ylab = "Latent trend",
     xlim = c(1960,2023))
Trends <- data.frame(Year = output$period, CrimeConcern = output$latent1)
# Grew by 43% from 1960s to early 1990s. Since then has fallen back by the same amount


# Check against individual trends

Data[[1]] %>%
  filter(str_detect(Varname, c("^GP_SocialProblem|^CSEW_worry|^CSEW_safe"))) %>%
  mutate(Group = case_when(
    str_detect(Varname, c("^GP_SocialProblem")) ~ "Gallup: Social problems...",
    str_detect(Varname, c("^CSEW_worry")) ~ "CSEW: Worried about...",
    str_detect(Varname, c("^CSEW_safe")) ~ "CSEW: Do not feel safe...")) %>%
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1965) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index), Group = Group) %>%
  mutate(Group = factor(Group, levels = c("Gallup: Social problems...", "CSEW: Do not feel safe...", "CSEW: Worried about..."))) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) %>%
  ggplot(aes(x = Year, y = Index, group = Varname)) +
  geom_line() + facet_wrap(facets = vars(Group), nrow = 1) +
  labs(title = "Selected underlying question series for Crime Concern estimate") +
  MyThemes::theme_base() + theme(legend.position = "none")

# Jackknife 

results <- tibble(Year = output$period, Latent = output$latent1, Version = "Full")
listofvars <- output$varname
jackknife <- function (x){
  Data[[1]] %>% filter(Varname != x) %>% mutate(Date = as.Date(Date)) -> temp
  out <- DyadRatios::extract(temp$Varname, temp$Date, temp$Index,
                 unit="A", mult=1, begindt=NA, enddt=NA, npass=1,
                 smoothing=TRUE, endmonth=12)
  vers <- paste0("without ", x)
  result <- tibble(Year = out$period, Latent = out$latent1, Version = vers)
  return(result)
}
jacks <- lapply(listofvars, jackknife)
results <- bind_rows(results, jacks)
ggplot(results, aes(x = Year, y = Latent, group = Version)) +
  geom_line(colour = 'grey') + geom_line(data = results[results$Version == "Full",], colour = 'blue', linewidth = 1.25) +
  ylab("Index") + 
  labs(title = "Jackknife estimates for Crime Concern (the blue line marks the full sample)") +
  MyThemes::theme_base()

rm(output, results, listofvars, concern, csign, jacks, temp, jackknife)



## Punitiveness

Data[[2]] %>% mutate(Date = as.Date(Date)) -> Punish
output2 <- DyadRatios::extract(Punish$Varname[Punish$Date >= as.Date("1978-01-01")],
                               Punish$Date[Punish$Date >= as.Date("1978-01-01")],
                               Punish$Index[Punish$Date >= as.Date("1978-01-01")],
                               unit="A", begindt=NA, enddt=NA,
                               smoothing=TRUE, verbose = TRUE)
output3 <- DyadRatios::extract(Punish$Varname[Punish$Date >= as.Date("1979-01-01")],
                               Punish$Date[Punish$Date >= as.Date("1979-01-01")],
                               Punish$Index[Punish$Date >= as.Date("1979-01-01")],
                               unit="A", begindt=NA, enddt=NA,
                               smoothing=TRUE, verbose = TRUE)
output4 <- DyadRatios::extract(Punish$Varname[Punish$Date >= as.Date("1980-01-01")],
                               Punish$Date[Punish$Date >= as.Date("1980-01-01")],
                               Punish$Index[Punish$Date >= as.Date("1980-01-01")],
                               unit="A", begindt=NA, enddt=NA,
                               smoothing=TRUE, verbose = TRUE)
# Percent Variance Explained:  58.53
# Number of usable series: 43 (questions), 1981 - 2023
# Final Weighted Average Metric:  Mean:  0.57  St. Dev:  0.04

# Figure 2: Punitiveness in England and Wales, 1978 - 2023
plot(x = output2$period, y = output2$latent1, type = "l", lty = 1,
     xlab = NA, ylab = "Latent trend", ylim = c(0.48,0.64),
     xlim = c(1975,2023)) +
lines(x = output3$period, y = output3$latent1, type = "l", lty = 2)+
lines(x = output4$period, y = output4$latent1, type = "l", lty = 3) +# stabilise after here
text(x = c(1978, 1979, 1981), c(output2$latent1[[1]], output3$latent1[[1]], output4$latent1[[1]]),
     labels=c(1978, 1979, 1981), pos=c(1,2,1))
# One variable in 1960 and 1965 (Gallup Sentencing Principle)
# Another 1978 and 1978 and 1981 (Gallup Government Longer Sentences)
# Both trending upwards over time
# Disparity then caused by the BES_stiffsentence variable in 1979 
# 91% of pop wanted longer sentences falls to 76% next time question was asked in 1983
# Question is also framed slightly differently. 
# In 1979 asks whether it is ver/fairly important that it should be done
# Thereafter, asks whether they agree (strongly etc) that it should be done.
# So probably best to start the series in 1981 where we have 3 survey questions

plot(x = output4$period, y = output4$latent1, type = "l", lty = 3,
     xlab = NA, ylab = "Latent trend",
     xlim = c(1980,2023), main = "Punitiveness in England and Wales, 1981 - 2023")+
segments(x0 = 1980, y0 = mean(output4$latent1[1:9]), x1 = 1990, 
         y1 = mean(output4$latent1[1:9]))+
segments(x0 = 1990, y0 = mean(output4$latent1[10:19]), x1 = 2000, 
         y1 = mean(output4$latent1[10:19]))+
segments(x0 = 2000, y0 = mean(output4$latent1[20:29]), x1 = 2010, 
         y1 = mean(output4$latent1[20:29]))+
text(x = c(1985, 1995, 2005), y = c(mean(output4$latent1[1:9]), mean(output4$latent1[10:19]), mean(output4$latent1[20:29])),
     labels=c(0.57, 0.58, 0.60), pos=1)
# Extensive variation in the 1980s
# before settling at more stable and high level through 1990s and 2000s
# 5% increase 1980s to 2000s, since then 23% decline from peak

Trends <- full_join(Trends, data.frame(Year = output4$period, Punitiveness = output4$latent1))

# Check against individual trends

Data[[2]] %>%
  filter(str_detect(Varname, c("^BES_stiffsentenceBES|^BSA_obey|^BSA_wron|^BSA_stifsen|^CSEW_senttoo|^CSEW_juv|^CSEW_crimcau"))) %>%
  mutate(Group = case_when(
    str_detect(Varname, c("^BES_stiffsentenceBES")) ~ "BES: Stiffer sentences",
    str_detect(Varname, c("^BSA_wron|^BSA_stif|^BSA_obey")) ~ "BSA: Stiffer sentences, always obey law",
    str_detect(Varname, c("^CSEW_senttoo|^CSEW_juv|^CSEW_crimcau")) ~ "CSEW: Sentencing too lenient")) %>%
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1981) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index), Group = Group) %>%
  mutate(Group = factor(Group, levels = c("BES: Stiffer sentences", "BSA: Stiffer sentences, always obey law", "CSEW: Sentencing too lenient"))) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) %>%
  ggplot(aes(x = Year, y = Index, group = Varname)) +
  geom_line() + facet_wrap(facets = vars(Group), nrow = 1) +
  labs(title = "Selected underlying question series for Punitiveness estimate") +
  MyThemes::theme_base() + theme(legend.position = "none")

Data[[2]] %>%
  mutate(Year = as.numeric(substr(as.character(Date),1,4))) %>%
  filter(Year > 1975 & Year < 1991) %>% 
  arrange(Year) %>%
  group_by(Varname) %>%
  filter(n()>1) %>%
  mutate(Group = factor(ifelse(first(Index) < last(Index), "Increasing", "Decreasing"),
                        levels = c("Decreasing","Increasing"))) %>%
  group_by(Group, Varname) %>%
  summarise(Change = ((last(Index)-first(Index))/first(Index)),
            Index = list(Index)) %>%
  select(Varname, Index, Change) %>%
  arrange(Change) %>%
  gt(rowname_col = "Varname") %>%
  tab_header(title = md("**Table 4: Individual punitiveness questions, 1975-1990**")) %>%
  fmt_percent(decimals = 0) %>%
  cols_align(align = "center", columns = c('Index','Change')) %>%
  gtExtras::gt_plt_sparkline(Index, same_limit = FALSE)


# Jackkife

results <- tibble(Year = output4$period, Latent = output4$latent1, Version = "Full")
listofvars <- output4$varname
jackknife <- function (x){
  Data[[2]] %>% filter(Varname != x) %>% mutate(Date = as.Date(Date)) %>% filter(Date >= as.Date("1980-01-01")) -> temp
  out <- DyadRatios::extract(temp$Varname, temp$Date, temp$Index,
                             unit="A", mult=1, begindt=NA, enddt=NA, npass=1,
                             smoothing=TRUE, endmonth=12)
  vers <- paste0("without ", x)
  result <- tibble(Year = out$period, Latent = out$latent1, Version = vers)
  return(result)
}
jacks <- lapply(listofvars, jackknife)
results <- bind_rows(results, jacks)
ggplot(results, aes(x = Year, y = Latent, group = Version)) +
  geom_line(colour = 'grey') + geom_line(data = results[results$Version == "Full",], colour = 'blue', linewidth = 1.25) +
  ylab("Index") + 
  labs(title = "Jackknife estimates for Punitiveness (the blue line marks the full sample)") +
  MyThemes::theme_base()

rm(output2,output3,output4,Punish,csign,listofvars,jackknife,jacks,results)



###### Extensions,

#### Death Penalty

Data[[3]] %>%
  mutate(Date = as.Date(Date)) %>%
  filter(Date >= as.Date("1964-01-01")) -> DeathPen
output <- DyadRatios::extract(DeathPen$Varname, DeathPen$Date, DeathPen$Index,
                              unit="A", begindt=NA, enddt=NA,
                              smoothing=TRUE, verbose = TRUE)
# Percent Variance Explained:  66.84
# Number of usable series:  24 (questions), 1964 - 2023
# Final Weighted Average Metric:  Mean:  0.6  St. Dev:  0.04
output1 <- DyadRatios::extract(DeathPen$Varname[DeathPen$Date >= as.Date("1981-01-01")],
                               DeathPen$Date[DeathPen$Date >= as.Date("1981-01-01")],
                               DeathPen$Index[DeathPen$Date >= as.Date("1981-01-01")],
                               unit="A", begindt=NA, enddt=NA,
                               smoothing=TRUE, verbose = TRUE)
# Percent Variance Explained:  62.97
# Number of usable series:  22 (questions), 1981 - 2023
# Final Weighted Average Metric:  Mean:  0.54  St. Dev:  0.04

# Figure 3: Support for the death penalty in England and Wales, 1964 - 2023
plot(x = output$period, y = output$latent1, type = "l", 
     xlab = NA, ylab = "Latent trend", ylim = c(0.54,0.71),
     xlim = c(1959,2023)) +
lines(x = output1$period, y = output1$latent1, type = "l", lty = 2)+
text(x = c(1964, 1981), y = c(output$latent1[[1]], output1$latent1[[1]]+0.01),
     labels = c(1964,1981), pos = 2)
# steady fall of 15% in support for death penalty over past 60 years
Trends <- full_join(Trends, data.frame(Year = output$period, DeathPen1 = output$latent1))
rm(output, output1, DeathPen,csign)


# Check against individual trends

Data[[3]] %>%
  filter(Varname %in% c("BES_deathpenBES","BSA_deathapp")) %>%
  mutate(Varname = ifelse(Varname == "BES_deathpenBES","BES: Keep the death penalty",
                          "BSA: Death penalty is appropriate")) %>%
  mutate(Year = as.numeric(substr(Date,1,4))) %>% 
  filter(Year >= 1961) %>%
  group_by(Varname, Year) %>%
  summarise(Index = mean(Index), Group = Varname) %>%
  mutate(Group = factor(Group, levels = c("BES: Keep the death penalty", "BSA: Death penalty is appropriate"))) %>%
  ungroup() %>%
  group_by(Varname) %>% 
  filter(n()>1) %>%
  ggplot(aes(x = Year, y = Index, group = Group)) +
  geom_line() + facet_wrap(facets = vars(Group), nrow = 1) +
  labs(title = "Selected underlying question series for Death Penalty estimate") +
  MyThemes::theme_base() + theme(legend.position = "none")


# Jackknife

results <- tibble(Year = output$period, Latent = output$latent1, Version = "Full")
listofvars <- output$varname
jackknife <- function (x){
  Data[[3]] %>% filter(Varname != x) %>% mutate(Date = as.Date(Date)) %>% filter(Date >= as.Date("1964-01-01")) -> temp
  out <- DyadRatios::extract(temp$Varname, temp$Date, temp$Index,
                             unit="A", mult=1, begindt=NA, enddt=NA, npass=1,
                             smoothing=TRUE, endmonth=12)
  vers <- paste0("without ", x)
  result <- tibble(Year = out$period, Latent = out$latent1, Version = vers)
  return(result)
}
jacks <- lapply(listofvars, plyr::failwith(f = jackknife))
results <- bind_rows(results, jacks)
ggplot(results, aes(x = Year, y = Latent, group = Version)) +
  geom_line(colour = 'grey') + geom_line(data = results[results$Version == "Full",], colour = 'blue', linewidth = 1.25) +
  ylab("Index") + 
  labs(title = "Jackknife estimates for Death Penalty (the blue line marks the full sample)") +
  MyThemes::theme_base()



###### Prioritisation of crime as a social issue

Data[[4]] %>%
  mutate(Varname = factor(Varname, levels = c("Gallup (MUP)","Ipsos (MII)",
                                              "BSA (GSP)","YouGov (MII)","BES (MII)")),
         Year = as.numeric(substr(Date,1,4))) %>%
  ggplot(aes(x = Year, y = Index)) +
  geom_line() + MyThemes::theme_base() +
  facet_grid(rows = vars(Varname)) +
  xlab(NULL) + ylab(NULL) +
  labs(title = "Question series of Prioritisation")

Data[[4]] %>% mutate(Date = as.Date(Date)) -> Priority
output <- DyadRatios::extract(Priority$Varname, Priority$Date, Priority$Index,
                              unit="A", begindt=NA, enddt=NA,
                              smoothing=TRUE, verbose = TRUE)
# Percent Variance Explained:  73.72
# Number of usable series: 5 (questions), 1973-2023
# Final Weighted Average Metric:  Mean:  0.05  St. Dev:  0.03

# Figure 4: Public prioritisation of crime, 1973 - 2023
plot(x = output$period, y = output$latent1, type = "l", 
     xlab = NA, ylab = "Latent trend",
     xlim = c(1970,2023))
# 8% increase between 1973 and 2010, then 8% decline since then
Trends <- full_join(Trends, data.frame(Year = output$period, MIP = output$latent1))
rm(output,csign, Priority)

Trends |> 
  arrange(Year) |> 
  write.csv(file  = "/Users/matteo/Desktop/Prison Polling/LatentTrendsAllAdults.csv")
